*********************************************************************************
*   Replication code for Kovak, Oldenski, Sly                                   *
*   "The Labor Market Effects of Offshoring by U.S. Multinational Firms"		*
*																				*
*********************************************************************************


******** IV generating stage

use KOS_data.dta 
*Note: this file contains confidential firm-level information and is available to researchers who have been cleared to access the Bureau of Economic Analysis (BEA) firm-level data on the operations of US multinational firms.

log using KOS_aff-level.log, replace

xi: xtivreg2 ln_emp btt i.sicyear, i(aff_id) cluster(ctry year) fe
test btt

xi: xtivreg2 ln_emp btt i.sicyear if high_rid_dummy==1, i(aff_id) cluster(ctry year) fe
test btt 

xi: xtivreg2 ln_emp btt i.sicyear if high_rid_dummy==0, i(aff_id) cluster(ctry year) fe
test btt 

xi: xtivreg2 ln_emp btt lsgdp lgdfsq lskldf ltcost bit fta exrate parsibbtt sibsibbtt i.sicyear, i(aff_id) cluster(ctry year) fe
test btt parsibbtt sibsibbtt

xi: xtivreg2 ln_emp btt lsgdp lgdfsq lskldf ltcost bit fta exrate parsibbtt sibsibbtt i.sicyear if high_rid_dummy==1, i(aff_id) cluster(ctry year) fe
test btt parsibbtt sibsibbtt

xi: xtivreg2 ln_emp btt lsgdp lgdfsq lskldf ltcost bit fta exrate parsibbtt sibsibbtt i.sicyear if high_rid_dummy==0, i(aff_id) cluster(ctry year) fe
test btt parsibbtt sibsibbtt

log close



******** Collapse to parent level using smearing technique

***** Weight control variables by pre-btt employment shares

foreach v in lsgdp lgdfsq lskldf ltcost bit fta exrate {
gen `v'_pretreatwght = `v'*pre_btt_emp_share 
} 

gen ln_emp_par = ln(emp_par)


***** Smearing

*** original regression with logged dependent variable

xi: xtivreg2 ln_emp btt lsgdp lgdfsq lskldf ltcost bit fta exrate parsibbtt sibsibbtt i.sicyear if high_rid_dummy==1, i(aff_id) cluster(ctry year) fe

disp (e(rmse))^2
scalar sigma2_hat = (e(rmse))^2
predict lnemp_hat, xb


*** smearing estimator, incorporating nonlinearity

* step 1

gen smear_emp_hat = exp(lnemp_hat + 0.5*sigma2_hat)
sum smear_emp_hat
gen smear_var_emp_hat = (exp(sigma2_hat)-1)*exp(2*lnemp_hat + sigma2_hat)
sum smear_var_emp_hat
save, replace


* step 2: collapse to parent level
keep if high_rid_dummy==1

collapse (sum) smear_emp_hat smear_var_emp_hat emp lsgdp_pretreatwght lgdfsq_pretreatwght lskldf_pretreatwght ltcost_pretreatwght bit_pretreatwght fta_pretreatwght exrate_pretreatwght (mean) emp_par ln_emp_par , by(us_id sic year)


* step 3

gen ln_emp_hat = ln(smear_emp_hat) - 0.5*ln((smear_var_emp_hat / smear_emp_hat^2) + 1)
gen ln_emp_aff = ln(emp)
save KOS_parent-level_smear_high-RID_only.dta, replace


******** Parent-level IV regression:

log using KOS_2SLS_parent-level_high_RID_only.log, replace

*with controls
xi: xtivreg2 ln_emp_par lsgdp_pretreatwght lgdfsq_pretreatwght lskldf_pretreatwght ltcost_pretreatwght bit_pretreatwght fta_pretreatwght exrate_pretreatwght i.sicyear (ln_emp_aff = ln_emp_hat), first i(us_id) cluster(us_id year) fe

*no controls
xi: xtivreg2 ln_emp_par i.sicyear (ln_emp_aff = ln_emp_hat), first i(us_id) cluster(us_id year) fe

log close


***new vs continuing affs:

log using KOS_2SLS_parent-level_new_vs_continuing_aff_high_RID_only.log, replace

*new aff
xi: xtivreg2 ln_emp_par lsgdp_pretreatwght lgdfsq_pretreatwght lskldf_pretreatwght ltcost_pretreatwght bit_pretreatwght fta_pretreatwght exrate_pretreatwght i.sicyear (ln_emp_aff = ln_emp_hat) if new_aff_dummy==1, first i(us_id) cluster(us_id year) fe

*continuing aff
xi: xtivreg2 ln_emp_par lsgdp_pretreatwght lgdfsq_pretreatwght lskldf_pretreatwght ltcost_pretreatwght bit_pretreatwght fta_pretreatwght exrate_pretreatwght i.sicyear (ln_emp_aff = ln_emp_hat) if new_aff_dummy==0, first i(us_id) cluster(us_id year) fe

log close


***OLS

log using KOS_OLS_parent-level_high_RID_only.log, replace

*with controls
xi: xtivreg2 ln_emp_par ln_emp_aff lsgdp_pretreatwght lgdfsq_pretreatwght lskldf_pretreatwght ltcost_pretreatwght bit_pretreatwght fta_pretreatwght exrate_pretreatwght i.sicyear, i(us_id) cluster(us_id year) fe

*no controls
xi: xtivreg2 ln_emp_par ln_emp_aff i.sicyear, i(us_id) cluster(us_id year) fe

log close


******** Industry-level IV regressions:

use KOS_data.dta

* step 1 of smearing

***** Weight control variables by pre-btt employment shares

foreach v in lsgdp lgdfsq lskldf ltcost bit fta exrate {
gen `v'_pretreatwght = `v'*pre_btt_emp_share_ind
} 

gen ln_emp_par = ln(emp_par)


***** Smearing

*** original regression with logged dependent variable

xi: xtivreg2 ln_emp btt lsgdp lgdfsq lskldf ltcost bit fta exrate parsibbtt sibsibbtt i.sicyear if high_rid_dummy==1, i(aff_id) cluster(ctry year) fe

disp (e(rmse))^2
scalar sigma2_hat = (e(rmse))^2
predict lnemp_hat, xb


*** smearing estimator, incorporating nonlinearity

* step 1
gen smear_emp_hat = exp(lnemp_hat + 0.5*sigma2_hat)
sum smear_emp_hat
gen smear_var_emp_hat = (exp(sigma2_hat)-1)*exp(2*lnemp_hat + sigma2_hat)
sum smear_var_emp_hat


* step 2

keep if high_rid_dummy==1
collapse (sum) smear_emp_hat smear_var_emp_hat emp lsgdp_pretreatwght lgdfsq_pretreatwght lskldf_pretreatwght ltcost_pretreatwght bit_pretreatwght fta_pretreatwght exrate_pretreatwght (mean) emp_par ln_emp_par, by(sic year)


* step 3

gen ln_emp_ind_hat = ln(smear_emp_hat) - 0.5*ln((smear_var_emp_hat / smear_emp_hat^2) + 1)
gen ln_emp_aff = ln(emp)
sum ln_emp_aff ln_emp_ind_hat 
corr ln_emp_aff ln_emp_ind_hat
save KOS_ind-level_smear_high_RID_.dta, replace


*** merge in industry-level CBP data

sort sic year
merge sic year using H:\KOS_ind-level_emp_cbp.dta
tab _merge
drop if _merge==2
drop _merge
gen ln_emp_ind_cbp_0 = ln(emp_0)
gen ln_emp_ind_cbp_i = ln(emp_i)
xtset sic
save, replace


log using KOS_2SLS_ind-level_high_RID.log, replace

*With controls
xi: xtivreg2 ln_emp_ind_cbp_i lsgdp_pretreatwght lgdfsq_pretreatwght lskldf_pretreatwght ltcost_pretreatwght bit_pretreatwght fta_pretreatwght exrate_pretreatwght i.year (ln_emp_aff = ln_emp_ind_hat), first i(sic) cluster(sic year) fe

*No controls
xi: xtivreg2 ln_emp_ind_cbp_i i.year (ln_emp_aff = ln_emp_ind_hat), first i(sic) cluster(sic year) fe

log close


****Industry-level separating MNE emp vs non-MNE emp

gen emp_domestic = emp_i - emp_mne
gen ln_emp_domestic = ln(emp_domestic)
gen ln_emp_mne = ln(emp_mne)
gen mne_share = emp_mne/emp_i
save, replace


log using KOS_2SLS_ind-level_mne_vs_non-mne_high_RID.log, replace

***MNE US employment only

*With controls
xi: xtivreg2 ln_emp_mne lsgdp_pretreatwght lgdfsq_pretreatwght lskldf_pretreatwght ltcost_pretreatwght bit_pretreatwght fta_pretreatwght exrate_pretreatwght i.year (ln_emp_aff = ln_emp_ind_hat), first i(sic) cluster(sic year) fe

*No controls
xi: xtivreg2 ln_emp_mne i.year (ln_emp_aff = ln_emp_ind_hat), first i(sic) cluster(sic year) fe

***non-MNE US employment only

*With controls
xi: xtivreg2 ln_emp_domestic lsgdp_pretreatwght lgdfsq_pretreatwght lskldf_pretreatwght ltcost_pretreatwght bit_pretreatwght fta_pretreatwght exrate_pretreatwght i.year (ln_emp_aff = ln_emp_ind_hat), first i(sic) cluster(sic year) fe

*No controls
xi: xtivreg2 ln_emp_domestic i.year (ln_emp_aff = ln_emp_ind_hat), first i(sic) cluster(sic year) fe

log close



***ind-level OLS

log using KOS_OLS_ind-level_high_RID.log, replace

*With controls
xi: xtivreg2 ln_emp_ind_cbp_i ln_emp_aff lsgdp_pretreatwght lgdfsq_pretreatwght lskldf_pretreatwght ltcost_pretreatwght bit_pretreatwght fta_pretreatwght exrate_pretreatwght i.year, i(sic) cluster(sic year) fe

*No controls
xi: xtivreg2 ln_emp_ind_cbp_i ln_emp_aff i.year, i(sic) cluster(sic year) fe

log close



******** MSA-level IV regressions:

use KOS_cbp_newmsa_ind_weight_1986.dta

* combine industry x year shocks with weights

sort sic
joinby sic using KOS_ind-level_smear_high_RID.dta

*this is predicted affiliate employment at year x sic3 level

* calculate newmsa weighted averages

gen msa_emp_hat_i= empshare1986_i * smear_emp_hat
gen msa_emp_aff_i= empshare1986_i * emp

foreach v in lsgdp_pretreatwght lgdfsq_pretreatwght lskldf_pretreatwght ltcost_pretreatwght bit_pretreatwght fta_pretreatwght exrate_pretreatwght {
  
gen msa_`v'_i = empshare1986_i *`v'

}

collapse (sum) msa_emp_hat_i msa_emp_aff_i msa_lsgdp_pretreatwght_i msa_lgdfsq_pretreatwght_i msa_lskldf_pretreatwght_i msa_ltcost_pretreatwght_i msa_bit_pretreatwght_i msa_fta_pretreatwght_i msa_exrate_pretreatwght_i, by(newmsa year)

gen ln_msa_emp_hat_i = ln(msa_emp_hat_i)
gen ln_msa_emp_aff_i = ln(msa_emp_aff_i)
sort newmsa year
save msa_ln_emp_hat_shocks_high_RID, replace


*** KOS_cbp_newmsa_year.dta are the msa-level outcome data

sort newmsa year
merge newmsa year using KOS_cbp_newmsa_year.dta
tab _merge
keep if _merge==3
drop _merge
gen ln_emp_cbp = ln(emp)
save KOS_msa_level_data_high_RID.dta, replace


***MSA level regressions

log using KOS_2SLS_msa-level_high_RID.log

xtset newmsa

*with controls
xi: xtivreg2 ln_emp_cbp msa_lsgdp_pretreatwght msa_lgdfsq_pretreatwght msa_lskldf_pretreatwght msa_ltcost_pretreatwght msa_bit_pretreatwght msa_fta_pretreatwght msa_exrate_pretreatwght i.year (ln_msa_emp_aff = ln_msa_emp_hat), first i(newmsa) cluster(newmsa year) fe

*no controls
xi: xtivreg2 ln_emp_cbp i.year (ln_msa_emp_aff = ln_msa_emp_hat), first i(newmsa) cluster(newmsa year) fe

log close


***msa-level ols

log using KOS_OLS_msa-level_high_RID.log, replace

*with controls
xi: xtivreg2 ln_emp_cbp ln_msa_emp_aff msa_lsgdp_pretreatwght msa_lgdfsq_pretreatwght msa_lskldf_pretreatwght msa_ltcost_pretreatwght msa_bit_pretreatwght msa_fta_pretreatwght msa_exrate_pretreatwght i.year, first i(newmsa) cluster(newmsa year) fe

*no controls
xi: xtivreg2 ln_emp_cbp ln_msa_emp_aff i.year, first i(newmsa) cluster(newmsa year) fe
log close


******** Reduced form results

use KOS_data.dta, clear

collapse (mean) emp_par ln_emp_par (sum) lsgdp_pretreatwght lgdfsq_pretreatwght lskldf_pretreatwght ltcost_pretreatwght bit_pretreatwght fta_pretreatwght exrate_pretreatwght (max) first_treatment first_treatment_weighted btt_rf_dummy btt_rf_dummy_weighted, by(us_id sic year)

ren first_treatment t0
ren first_treatment_weighted t0_weighted

sort us_id year

forvalues v = 1 to 20 {
by us_id: gen tminus`v_weighted' =  t0_weighted[_n+`v']
by us_id: gen tplus`v'_weighted =  t0_weighted[_n-`v']

}

recode tplus* tminus* t0 (.=0)

gen tminus6plus_weighted = tminus6_weighted + tminus7_weighted + tminus8_weighted + tminus9_weighted + tminus10_weighted + tminus11_weighted + tminus12_weighted + tminus13_weighted + tminus14_weighted + tminus15_weighted + tminus16_weighted + tminus17_weighted + tminus18_weighted + tminus19_weighted + tminus20_weighted

gen tplus6plus_weighted = tplus6_weighted + tplus7_weighted + tplus8_weighted + tplus9_weighted + tplus10_weighted + tplus11_weighted + tplus12_weighted + tplus13_weighted + tplus14_weighted + tplus15_weighted + tplus16_weighted + tplus17_weighted + tplus18_weighted + tplus19_weighted + tplus20_weighted

save KOS_reduced_form_parent-level.dta


***coefficients for event study graph

log using KOS_reduced_form_weighted.log, replace

xi: xtivreg2 ln_emp_par tminus6plus_weighted tminus5_weighted tminus4_weighted tminus3_weighted tminus2_weighted t0_weighted tplus1_weighted tplus2_weighted tplus3_weighted tplus4_weighted tplus5_weighted tplus6plus_weighted lsgdp_pretreatwght lgdfsq_pretreatwght lskldf_pretreatwght ltcost_pretreatwght bit_pretreatwght fta_pretreatwght exrate_pretreatwght i.sicyear if high_rid_dummy==1, i(us_id) cluster(us_id year) fe

log close


***dif-in-dif

log using KOS_reduced_form_dif-in-dif_weighted.log, replace

xi: xtivreg2 ln_emp_par btt_rf_dummy_weighted i.sicyear, i(us_id) cluster(us_id year) fe

xi: xtivreg2 ln_emp_par btt_rf_dummy_weighted i.sicyear if high_rid_dummy==1, i(us_id) cluster(us_id year) fe

xi: xtivreg2 ln_emp_par btt_rf_dummy_weighted i.sicyear if high_rid_dummy==0, i(us_id) cluster(us_id year) fe

xi: xtivreg2 ln_emp_par btt_rf_dummy_weighted lsgdp_pretreatwght lgdfsq_pretreatwght lskldf_pretreatwght ltcost_pretreatwght bit_pretreatwght fta_pretreatwght exrate_pretreatwght i.sicyear, i(us_id) cluster(us_id year) fe

xi: xtivreg2 ln_emp_par btt_rf_dummy_weighted lsgdp_pretreatwght lgdfsq_pretreatwght lskldf_pretreatwght ltcost_pretreatwght bit_pretreatwght fta_pretreatwght exrate_pretreatwght i.sicyear if high_rid_dummy==1, i(us_id) cluster(us_id year) fe

xi: xtivreg2 ln_emp_par btt_rf_dummy_weighted lsgdp_pretreatwght lgdfsq_pretreatwght lskldf_pretreatwght ltcost_pretreatwght bit_pretreatwght fta_pretreatwght exrate_pretreatwght i.sicyear if high_rid_dummy==0, i(us_id) cluster(us_id year) fe

log close


******** Vertical MNE vs non-MNE results (industry-level)

use KOS_data.dta

keep if vert_dummy==1

* step 1 of smearing

***** Weight control variables by pre-btt employment shares

foreach v in lsgdp lgdfsq lskldf ltcost bit fta exrate {
gen `v'_pretreatwght = `v'*pre_btt_emp_share_ind
} 

gen ln_emp_par = ln(emp_par)


***** Smearing

*** original regression with logged dependent variable

xi: xtivreg2 ln_emp btt lsgdp lgdfsq lskldf ltcost bit fta exrate parsibbtt sibsibbtt i.sicyear if high_rid_dummy==1, i(aff_id) cluster(ctry year) fe

disp (e(rmse))^2
scalar sigma2_hat = (e(rmse))^2
predict lnemp_hat, xb

*** smearing estimator, incorporating nonlinearity

* step 1

gen smear_emp_hat = exp(lnemp_hat + 0.5*sigma2_hat)
sum smear_emp_hat
gen smear_var_emp_hat = (exp(sigma2_hat)-1)*exp(2*lnemp_hat + sigma2_hat)
sum smear_var_emp_hat

* step 2

keep if high_rid_dummy==1

collapse (sum) smear_emp_hat smear_var_emp_hat emp lsgdp_pretreatwght lgdfsq_pretreatwght lskldf_pretreatwght ltcost_pretreatwght bit_pretreatwght fta_pretreatwght exrate_pretreatwght (mean) emp_par ln_emp_par, by(sic year)

* step 3

gen ln_emp_ind_hat = ln(smear_emp_hat) - 0.5*ln((smear_var_emp_hat / smear_emp_hat^2) + 1)
gen ln_emp_aff = ln(emp)
sum ln_emp_aff ln_emp_ind_hat 
corr ln_emp_aff ln_emp_ind_hat

save KOS_ind-level_smear_high_RID_vert.dta, replace


*** merge in industry-level CBP data

sort sic year
merge sic year using H:\KOS_ind-level_emp-wages_cbp.dta
tab _merge
drop if _merge==2
drop _merge
gen ln_emp_ind_cbp_0 = ln(emp_0)
gen ln_emp_ind_cbp_i = ln(emp_i)
xtset sic
save, replace


****Ind level separating MNE emp vs non-MNE emp

gen emp_domestic = emp_i - emp_mne
gen ln_emp_domestic = ln(emp_domestic)
gen ln_emp_mne = ln(emp_mne)
gen mne_share = emp_mne/emp_i
save, replace

log using KOS_2SLS_ind-level_mne_vs_non-mne_high_RID_vert.log, replace


***MNE US employment only

*With controls
xi: xtivreg2 ln_emp_mne lsgdp_pretreatwght lgdfsq_pretreatwght lskldf_pretreatwght ltcost_pretreatwght bit_pretreatwght fta_pretreatwght exrate_pretreatwght i.year (ln_emp_aff = ln_emp_ind_hat), first i(sic) cluster(sic year) fe

*No controls
xi: xtivreg2 ln_emp_mne i.year (ln_emp_aff = ln_emp_ind_hat), first i(sic) cluster(sic year) fe

***non-MNE US employment only

*With controls
xi: xtivreg2 ln_emp_domestic lsgdp_pretreatwght lgdfsq_pretreatwght lskldf_pretreatwght ltcost_pretreatwght bit_pretreatwght fta_pretreatwght exrate_pretreatwght i.year (ln_emp_aff = ln_emp_ind_hat), first i(sic) cluster(sic year) fe

*No controls
xi: xtivreg2 ln_emp_domestic i.year (ln_emp_aff = ln_emp_ind_hat), first i(sic) cluster(sic year) fe

log close



